################################################################################
### Replication Code: Statistical Analysis
################################################################################
#
# Paper: Rationalizing Protest Participation  
#
# Authors: Tim Baule, Jonathan Bothner, Maximilian Kähny
#
# Software: R version 4.4.0 using Windows
#
################################################################################

# load packages
library(haven)
library(dplyr)
library(tidyr)
library(spdep)
library(Matrix)
library(readr)
library(spatialreg)
library(sphet)
library(spatialprobit)
library(ProbitSpatial)
library(igraph)
library(stargazer)
library(vtable)
library(sandwich)
library(lmtest)
library(modelsummary)

# load data
rm(list = ls())
setwd("C:/Users/timba/OneDrive - Universität Bayreuth (1)/Uni/Research General/Protest Network Project/empirics/do files and skripts/final")
anes <- read_dta("anes_timeseries_2020_clean.dta")
spatial_ids <- read_csv("spatial_matrix_ids.csv")
pol_cluster_matrix_ids = read_csv("pol_cluster_matrix_ids.csv")
political_clusters_3to11 = read_csv("political_clusters_3to11.csv")


# merge files
anes <- anes %>%
  rename(id = V200001) %>%
  inner_join(pol_cluster_matrix_ids, by = "id") %>%
  inner_join(political_clusters_3to11, by = "id") %>%
  arrange(id)

# create variable lists
state_id_var <- "state_id"
all_vars <- c("protest", "leftright", "leftright2", "reli", "trust","age", "educ", "inc", "longitude", "latitude", "black","male", "health", "married", "children", "volunteer", "community_service", "attend_church", "dummy_NE", "dummy_MW", "dummy_South", state_id_var)

# drop observations with missing values
cc <- complete.cases(anes[, all_vars])
anes_cc <- anes[cc, ]

### define clusters
# environment
anes_cc$environment = 0
for(i in 1:16){
  anes_cc$environment[anes_cc[[paste0("V202205y", i)]] %in% c(17,18)] = 1
}
# blm
anes_cc$blm = 0
for(i in 1:16){
  anes_cc$blm[anes_cc[[paste0("V202205y", i)]] %in% c(32,34) & anes_cc$V202174>50 & anes_cc$V202174 <=100] = 1
}
anes_cc$blm_pure = 0
for(i in 1:16){
  anes_cc$blm_pure[anes_cc[[paste0("V202205y", i)]] %in% c(32,34)] = 1
}
# immigration
anes_cc$immigration = 0
for(i in 1:16){
  anes_cc$immigration[anes_cc[[paste0("V202205y", i)]] %in% c(41)] = 1  
}
anes_cc$immigration_pure = 0
for(i in 1:16){
  anes_cc$immigration_pure[anes_cc[[paste0("V202205y", i)]] %in% c(41)] = 1
}
# health
anes_cc$health_clu = 0
for(i in 1:16){
  anes_cc$health_clu[anes_cc[[paste0("V202205y", i)]] %in% c(82,23,24)] = 1
}


##################################################
### Data Prep Function
##################################################

### define function
data_setup = function(data, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig){
  
  # filtering of clusters
  if (decision_cluster == "none") {
    anes_trim = anes_cc
  } else if (decision_cluster == "pol") {
    anes_trim = anes_cc %>%
      filter(c3 == selection_cluster_pol)
  } else {
    anes_trim = anes_cc %>%
      filter(.[[selection_cluster_protest]] == 1)
  }
  
  # variables for mahalanobis distance
  covariate_names <- c("leftright", "reli", "trust", "age","educ", "inc", "longitude", "latitude", "volunteer", "community_service", "attend_church")
  Xcov <- as.matrix(anes_trim[, covariate_names])
  n <- nrow(Xcov)
  
  # Mahalanobis distance and proximity matrix
  Sigma <- cov(Xcov)
  D2 <- matrix(0, n, n)
  for (i in seq_len(n)) {
    D2[i, ] <- mahalanobis(Xcov, center = Xcov[i, ], cov = Sigma)
  }
  D <- sqrt(D2)
  invD <- 1 / D
  diag(invD) <- 0
  invD[!is.finite(invD)] <- 0
  
  # define number of neighbors
  Wbin_cc <- matrix(0, n, n)
  for (i in seq_len(n)) {
    top_neig <- order(invD[i, ], decreasing = TRUE)[1:neig]
    Wbin_cc[i, top_neig] <- 1
  }
  
  # Create spatial weights object
  Wmat_cc <- Matrix(Wbin_cc, sparse = TRUE)
  listw_cc <- mat2listw(Wmat_cc, style = "W", zero.policy = FALSE)
  stopifnot(all(rowSums(Wbin_cc) == neig))
  
  # return
  return(list(listw_cc = listw_cc, anes_trim = anes_trim))
}


##################################################
### Summary Statistics
##################################################

### Replication of Table 1: Summary Statistics

# filter used observations
neig = 15                             # spillover definition: set to 10, 15, or 20, depending on the specification
decision_cluster = "none"              # options: "none"; "pol"; "pro"
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim

# political attitudes
range <- seq(0,100)
poldata <- select(anes_trim, id, V202163, V202162, V202180, V202179, V202164, V202161, V202166, V202160, V202172, V202183, V202159, V202171, V202173, V202174, V202175, V202176, V202177, V202186)
poldata <- rename(poldata, "laborunions" =V202162, "conservatives" =V202164, "liberals" =V202161, "homosexuals"=V202166, "feminists" =V202160, "chrisfunda" =V202159, "police" =V202171, "blm"=V202174)
# subset, (2) filter data for missing values
poldata <- subset(poldata,	laborunions %in% range &	conservatives %in% range & 	liberals %in% range & chrisfunda %in% range & 	police %in% range & blm %in% range)
poldata <- select(poldata, laborunions, conservatives, liberals, feminists, chrisfunda, police, blm)

# distance variables: st_leftright st_reli st_trust st_age st_educ st_inc st_longitude
summary <- select(anes_trim, protest, leftright, leftright2, reli, trust, age, educ, inc, black, male, health, married, children, volunteer, community_service, attend_church,, dummy_NE, dummy_MW, dummy_South, longitude, latitude)
summary <- subset(summary, !apply(is.na(summary),1, any))
summary = relocate(summary, protest, age, educ, inc, black, male, health, married, children, leftright, leftright2, reli, trust, volunteer, community_service, attend_church,, dummy_NE, dummy_MW, dummy_South, longitude, latitude)

# summary stats
st(summary, out = "latex", digits = 2, fixed.digits = TRUE, numformat = NA, labels = c("Protest", "Age", "Education", "Income", "Black", "Male", "Health Status", "Married", "Children", "Left-right-scale", "Dev. Left-right-scale", "Religiosity", "Trust", "Volunteering", "Community Participation", "Church Attendance", "Dummy North-East", "Dummy Mid-West", "Dummy South", "Longitude", "Latitude"))
st(poldata, out = "latex", digits = 2, fixed.digits = TRUE, numformat = NA,
   labels = c("Labor Unions", "Conservatives", "Liberals", "Feminists", "Christian Fundamentalists", "Police", "Black Lives Matter"))

# stats 
print("healthcare, race relations, environment, immigration")
sum(anes_trim$health_clu)/dim(anes_trim)[1];sum(anes_trim$blm_pure)/dim(anes_trim)[1];sum(anes_trim$environment)/dim(anes_trim)[1];sum(anes_trim$immigration_pure)/dim(anes_trim)[1];


##################################################
### Preliminary OLS Regression
##################################################

# covariates
formula_str <- "protest ~ male + age + I(age^2) + black + inc + educ + leftright + leftright2 + health + married + children + reli + trust + volunteer + community_service + attend_church + dummy_NE + dummy_MW + dummy_South"

# Estimation 
ols1 <- lm(formula_str, data = anes_cc)
ols2 <- lm(formula_str, data = subset(anes_cc, c3 == 1))
ols3 <- lm(formula_str, data = subset(anes_cc, c3 == 2))
ols4 <- lm(formula_str, data = subset(anes_cc, c3 == 3))

# Robust SEs
robust1 <- vcovHC(ols1, type = "HC1")
robust2 <- vcovHC(ols2, type = "HC1")
robust3 <- vcovHC(ols3, type = "HC1")
robust4 <- vcovHC(ols4, type = "HC1")

# Stargazer table with robust SEs
stargazer(ols1, ols2, ols3, ols4,
          se = list(sqrt(diag(robust1)), 
                    sqrt(diag(robust2)), 
                    sqrt(diag(robust3)), 
                    sqrt(diag(robust4))),
          digits = 3,
          column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
          type = "latex")



stargazer(ols1, ols2, ols3, ols4,
          se = list(sqrt(diag(robust1)), 
                    sqrt(diag(robust2)), 
                    sqrt(diag(robust3)), 
                    sqrt(diag(robust4))),
          column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
          covariate.labels = c("Intercept", "Male", "Age", "Age$^2$", "Black", 
                               "Income", "Education", "Left Right Scale", 
                               "Left Right Dev.", "Health", "Married", 
                               "Number of children", "Religious", "Trust Others", 
                               "Volunteering", "Community Participation", 
                               "Church Attendance", "North East", "Mid West", "South"),
          digits = 3,
          type = "text")







##################################################
### Spatial Estimation
##################################################

### Replication for all spatial estimations for the year 2020
# vary "neig" from 10, 15, and 20 to obtain all results 
# all spatial regressions are run below first and then summarized jointly 
# this code yields Tables 2, 3, 4, 5, 6, and 7

# covariates
formula_str = "protest ~ male + age + I(age^2) + black + inc + educ + leftright + leftright2  + health + married + children + reli + trust + volunteer + community_service + attend_church + dummy_NE + dummy_MW + dummy_South"
# neigbors (this must be varied in order to obtain estimates for each cluster size)
neig = 20

### 1) baseline
decision_cluster = "none"              # options: "none"; "pol"; "pro"
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
baseline_10 <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
baseline_10_effects <- impacts(baseline_10, listw = listw_cc, R = 1000)


### 2) political cluster

# cluster: 1
decision_cluster = "pol"              # options: "none"; "pol"; "pro"
selection_cluster_pol = 1             # options: 1; 2; 3
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
pol_1 <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
pol_1_effects <- impacts(pol_1, listw = listw_cc, R = 1000, zstats = TRUE)
print("Cluster Protest Partizipation")
sum(anes_trim$protest)/dim(anes_trim)[1]
dim(anes_trim)[1]


# cluster: 2
decision_cluster = "pol"              # options: "none"; "pol"; "pro"
selection_cluster_pol = 2             # options: 1; 2; 3
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
pol_2 <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
pol_2_effects <- impacts(pol_2, listw = listw_cc, R = 1000)
print("Cluster Protest Partizipation")
sum(anes_trim$protest)/dim(anes_trim)[1]
dim(anes_trim)[1]


# cluster: 3
decision_cluster = "pol"              # options: "none"; "pol"; "pro"
selection_cluster_pol = 3             # options: 1; 2; 3
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
pol_3 <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
pol_3_effects <- impacts(pol_3, listw = listw_cc, R = 1000)
print("Cluster Protest Partizipation")
sum(anes_trim$protest)/dim(anes_trim)[1]
dim(anes_trim)[1]


### 3) protest cluster

# cluster: blm
decision_cluster = "pro"              # options: "none"; "pol"; "pro"
selection_cluster_protest = "blm"     # options: "blm"; "environment"; "immigration"; ""
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
pro_blm <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
pro_blm_effects <- impacts(pro_blm, listw = listw_cc, R = 1000)
print("Cluster Protest Partizipation")
sum(anes_trim$protest)/dim(anes_trim)[1]
dim(anes_trim)[1]

# cluster: environment
decision_cluster = "pro"              # options: "none"; "pol"; "pro"
selection_cluster_protest = "environment"     # options: "blm"; "environment"; "immigration"; "health_clu"
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
pro_environment <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
pro_environment_effects <- impacts(pro_environment, listw = listw_cc, R = 1000)
print("Cluster Protest Partizipation")
sum(anes_trim$protest)/dim(anes_trim)[1]
dim(anes_trim)[1]

# cluster: immigration
decision_cluster = "pro"              # options: "none"; "pol"; "pro"
selection_cluster_protest = "immigration"     # options: "blm"; "environment"; "immigration"; "health_clu"
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
pro_immigration <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
pro_immigration_effects <- impacts(pro_immigration, listw = listw_cc, R = 1000)
print("Cluster Protest Partizipation")
sum(anes_trim$protest)/dim(anes_trim)[1]
dim(anes_trim)[1]

# cluster: health_clu
decision_cluster = "pro"              # options: "none"; "pol"; "pro"
selection_cluster_protest = "health_clu"     # options: "blm"; "environment"; "immigration"; "health_clu"
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
pro_health_clu <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
pro_health_clu_effects <- impacts(pro_health_clu, listw = listw_cc, R = 1000)
print("Cluster Protest Partizipation")
sum(anes_trim$protest)/dim(anes_trim)[1]
dim(anes_trim)[1]


#######################################
### Results
#######################################

### Baseline
print("Spatial Coefficients")
summary(baseline_10)
print("Direct Effects, Indirect Effects")
summary(baseline_10_effects, zstats=TRUE, short =TRUE)

### Pol Cluster
# Cluster 1
print("Spatial Coefficients")
summary(pol_1)
print("Direct Effects, Indirect Effects")
summary(pol_1_effects, zstats=TRUE, short =TRUE)
# Cluster 2
print("Spatial Coefficients")
summary(pol_2)
print("Direct Effects, Indirect Effects")
summary(pol_2_effects, zstats=TRUE, short =TRUE)
# Cluster 3
print("Spatial Coefficients")
summary(pol_3)
print("Direct Effects, Indirect Effects")
summary(pol_3_effects, zstats=TRUE, short =TRUE)

### Protest Cluster
# Cluster blm
print("Spatial Coefficients")
summary(pro_blm)
print("Direct Effects, Indirect Effects")
summary(pro_blm_effects, zstats=TRUE, short =TRUE)
# Cluster immigration
print("Spatial Coefficients")
summary(pro_immigration)
print("Direct Effects, Indirect Effects")
summary(pro_immigration_effects, zstats=TRUE, short =TRUE)
# Cluster health_clu
print("Spatial Coefficients")
summary(pro_health_clu)
print("Direct Effects, Indirect Effects")
summary(pro_health_clu_effects, zstats=TRUE, short =TRUE)
# Cluster environment
print("Spatial Coefficients")
summary(pro_environment)
print("Direct Effects, Indirect Effects")
summary(pro_environment_effects, zstats=TRUE, short =TRUE)






